// .........................................................................
// Title: process_trace.do
//
// Imports TRACE data on bond transactions for US sample; computes returns
// .........................................................................

* --------------------------------------
* Importing TRACE: prices
* Get median prices from clean version of trace
* --------------------------------------
    
* process trace enhanced
use "$raw/trace/trace_enhanced_clean", clear
keep cusip_id bond_sym_id trd_exctn_dt trd_exctn_tm entrd_vol_qt rptd_pr
drop if missing(cusip)
gsort cusip trd_exctn_dt
by cusip_id trd_exctn_dt: egen price = median(rptd_pr)
by cusip_id trd_exctn_dt: keep if _n == 1
keep cusip_id trd_exctn_dt price
rename trd_exctn_dt date
rename cusip_id cusip
rename price trace_median_price
save "$tmp/trace_enhanced_clean_prices", replace

* also process trace standard (for latest few months not covered in enhanced)
use "$raw/trace/trace_standard_clean", clear
keep if trd_exctn_dt > td(30jun2020)
keep cusip_id bond_sym_id trd_exctn_dt trd_exctn_tm rptd_pr
drop if missing(cusip)
gsort cusip trd_exctn_dt
by cusip_id trd_exctn_dt: egen price = median(rptd_pr)
by cusip_id trd_exctn_dt: keep if _n == 1
keep cusip_id trd_exctn_dt price
rename trd_exctn_dt date
rename cusip_id cusip
rename price trace_median_price
save "$tmp/trace_standard_clean_prices", replace

* append the two
use "$tmp/trace_enhanced_clean_prices", clear
append using "$tmp/trace_standard_clean_prices"
winsor2 trace_median_price, cuts(.1 99.9)
save "$tmp/trace_clean_prices", replace

* collapse to weekly frequency
use "$tmp/trace_clean_prices", clear
gen date_w = wofd(date)
format %tw date_w
gsort cusip date_w date
by cusip date_w: keep if _n == _N
drop date
gsort cusip date_w
order cusip date_w
save "$tmp/trace_clean_prices_w", replace

* collapse to monthly frequency
use "$tmp/trace_clean_prices", clear
gen date_m = mofd(date)
format %tm date_m
gsort cusip date_m date
by cusip date_m: keep if _n == _N
drop date
gsort cusip date_m
order cusip date_m
save "$tmp/trace_clean_prices_m", replace

* collapse to quarterly frequency
use "$tmp/trace_clean_prices", clear
gen date_q = qofd(date)
format %tq date_q
gsort cusip date_q date
by cusip date_q: keep if _n == _N
drop date
gsort cusip date_q
order cusip date_q
save "$tmp/trace_clean_prices_q", replace

* collapse to yearly frequency
use "$tmp/trace_clean_prices", clear
gen year = year(date)
gsort cusip year date
by cusip year: keep if _n == _N
drop date
gsort cusip year
order cusip year
save "$tmp/trace_clean_prices_y", replace
    
* universe of bonds in trace
use "$tmp/trace_clean_prices", clear
gsort cusip
by cusip: keep if _n == 1
gen in_trace_universe = 1
keep cusip in_trace_universe
drop if missing(cusip)
save "$tmp/trace_clean_catalog", replace

* --------------------------------------
* Importing TRACE: yields
* --------------------------------------

* process trace enhanced
use "$raw/trace/trace_enhanced_clean", clear
drop if missing(cusip)
keep cusip_id bond_sym_id trd_exctn_dt trd_exctn_tm entrd_vol_qt rptd_pr yld_pt
drop if missing(yld_pt)
gsort cusip trd_exctn_dt
by cusip_id trd_exctn_dt: egen yield = median(yld_pt)
by cusip_id trd_exctn_dt: keep if _n == 1
keep cusip_id trd_exctn_dt yield
rename trd_exctn_dt date
rename cusip_id cusip
rename yield trace_median_yield
save "$tmp/trace_enhanced_clean_yields", replace
    
* also process trace standard (for latest few months not covered in enhanced)
use "$raw/trace/trace_standard_clean", clear
keep if trd_exctn_dt > td(30jun2020)
keep cusip_id bond_sym_id trd_exctn_dt trd_exctn_tm rptd_pr yld_pt
drop if missing(cusip)
gsort cusip trd_exctn_dt
by cusip_id trd_exctn_dt: egen yield = median(yld_pt)
by cusip_id trd_exctn_dt: keep if _n == 1
keep cusip_id trd_exctn_dt yield
rename trd_exctn_dt date
rename cusip_id cusip
rename yield trace_median_yield
save "$tmp/trace_standard_clean_yields", replace

* append the two
use "$tmp/trace_enhanced_clean_yields", clear
append using "$tmp/trace_standard_clean_yields"
save "$tmp/trace_clean_yields", replace

* collapse to a weekly frequency
use "$tmp/trace_clean_yields", clear
gen date_w = wofd(date)
format %tw date_w
gsort cusip date_w date
by cusip date_w: keep if _n == _N
drop date
save "$tmp/trace_clean_yields_w", replace

* collapse to a monthly frequency
use "$tmp/trace_clean_yields", clear
gen date_m = mofd(date)
format %tm date_m
gsort cusip date_m date
by cusip date_m: keep if _n == _N
drop date
save "$tmp/trace_clean_yields_m", replace

* --------------------------------------
* Raw returns
* --------------------------------------

* prepare to compute returns
use "$tmp/trace_clean_prices_w", clear
mmerge cusip using "$tmp/interest_details_consolidated", unmatched(m)
drop _merge
gsort cusip date_w
by cusip: gen prev_date_w = date_w[_n-1]

gen first_interest_date_w = wofd(first_interest_date)
gen maturity_date_w = wofd(maturity_date)
format %tw prev_date_w first_interest_date_w maturity_date_w

* interest frequency to number of coupons per year
gen ncoups = interest_frequency
replace ncoups = 0 if ncoups < 0 | ncoups > 12 // variable frequencies

* number of weeks since first interest date
gen coup_weeks = date_w - first_interest_date_w

* calculate next coupon date
cap drop coup_gap
gen nextcoup = first_interest_date_w if date_w <= first_interest_date_w
gen coup_gap = (1 + floor(coup_weeks * ncoups / 48)) * 48 / ncoups
replace nextcoup = first_interest_date_w + coup_gap if date_w > first_interest_date_w & maturity_date_w >= date_w & ncoups > 0
replace nextcoup = maturity_date_w if nextcoup > 0 & maturity_date_w < nextcoup
format %tw nextcoup

* coupon amount, accrued interest
cap drop coup_amt 
cap drop coup_acc
cap drop prevcoup
gen coup_amt = coupon / ncoups * (mod(coup_weeks, 48 / ncoups) == 0) if coup_weeks >= 0 & ncoups > 0 & date_w >= first_interest_date_w
gen prevcoup = nextcoup - 48 / ncoups
replace prevcoup = first_interest_date_w if prevcoup < first_interest_date_w
gen coup_acc = coupon * (date_w - prevcoup) * 7 / 365 if coup_weeks >= 0 & ncoups > 0 & date_w >= first_interest_date_w
replace coup_acc = 0 if missing(coup_acc)
replace coup_amt = 0 if missing(coup_amt)
replace coup_amt = 0 if date_w > maturity_date_w | coup_amt < 0
replace coup_acc = 0 if date_w > maturity_date_w | coup_acc < 0

* multi-coupon corner cases
cap drop weeks_gap
cap drop multi_coups
gen weeks_gap = date_w - prev_date_w
replace weeks_gap = 0 if missing(weeks_gap)
gen multi_coups = 1 + floor((weeks_gap - 1) * ncoups / 48) if weeks_gap > 1
replace multi_coups = 1 if missing(multi_coups)

* lagged values
gsort cusip date_w
by cusip: gen prev_coup_acc = coup_acc[_n-1]
by cusip: gen prev_trace_median_price = trace_median_price[_n-1]

* returns
gen net_return = (trace_median_price + coup_acc + coup_amt * multi_coups) / (prev_trace_median_price + prev_coup_acc) - 1 if ~missing(prev_trace_median_price) & prev_trace_median_price > 0
drop if missing(net_return)
rename net_return net_return_raw
winsor2 net_return_raw, cuts(.1 99.9) trim
rename net_return_raw_tr net_return

* saving
keep cusip date_w trace_median_price prev_date_w weeks_gap net_return_raw net_return trace_median_price_w
save "$tmp/trace_enhanced_clean_returns_w", replace
